#########################################################################   
#                                 INFO                                  #   
#########################################################################  

  # PROJECT: Gender quotas in Tunisia's 2018 municipal elections
  # PURPOSE: Analysis for mayorships
  # CREATED: September 2020 by Julia Clark
  # INPUTS:  quotas_mun_clean.R, quotas_list_clean.R,
  # OUTPUTS: graphs and tables

#########################################################################   
#               SETUP                                                   #   
######################################################################### 

  ######## ENVIRONMENT
  
  rm(list = ls()) 
  setwd("~/Desktop/replication_what_men_want/")
  
  ######## PACKAGES
  
  need <- c("openxlsx", "tidyr", "dplyr", "ggplot2", "stargazer", "viridis", 
            "multiwayvcov", "lmtest", "modelsummary",  "gmodels", "sjPlot", 
            "ggpubr") 
  have <- need %in% rownames(installed.packages()) 
  if(any(!have)) install.packages(need[!have]) 
  invisible(lapply(need, library, character.only=T)) 
  
  
  ######## SET ALL TABLES TO SHOW "NA" BY DEFAULT
  
  table = function (..., useNA = 'ifany') base::table(..., useNA = useNA)  
  
  ######## CLUSTERING FUNCTIONS
  
  f.test <- function(model, cluster){
    vcovCL<-cluster.vcov(model, cluster)
    ftest <- waldtest(model, vcov = vcovCL, test = "F")
    return(ftest)
  } 
  
  cluster.se <- function(model, cluster){
    coeftest(model, vcov=function(x) cluster.vcov(x, cluster))[,2]
  }

#########################################################################   
#               GGPLOT PARAMETERS                                       #   
#########################################################################    

  # Change default plot margins so not called as too big
  #par(mar=c(1,1,1,1))
  
  # Font sizes
  bold14 <- element_text(face = "bold", color = "black", size = 14)
  bold16 <- element_text(face = "bold", color = "black", size = 16)
  bold18 <- element_text(face = "bold", color = "black", size = 18)
  bold20 <- element_text(face = "bold", color = "black", size = 20)
  greyitalic18 <- element_text(face = "italic", color = "grey40", size = 18)
  
  # Continuous axes
  cy <- scale_y_continuous(expand = c(0,0))
  cx <- scale_x_continuous(expand = c(0,0)) 
  
  # Colors 
  vid_low <- "#481567FF"
  vid_med <- "#29AF7FFF"
  vid_high <- "#FDE725FF"
  
  # General theme
  lecs_theme <- theme_bw() + 
    theme(text = element_text(size = 20),
          title = bold18, 
          plot.subtitle = greyitalic18,
          axis.title = bold16, 
          legend.title = bold16,
          plot.caption = element_text(face = "italic", color = "grey40", size = 16,
                                      hjust = 0, margin = margin(t = 15, r = 0, b = 0,
                                                                 l = 0)),
          axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)),
          axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0),
                                      angle = 0, vjust = .5),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.background = element_blank()
    )
  
  # Theme with lines for coeff plots
  lecs_theme_lines <- theme_bw() + 
    theme(text = element_text(size = 18),
          title = bold18, 
          plot.subtitle = greyitalic18,
          axis.title = bold14, 
          legend.title = bold14,
          plot.caption = element_text(face = "italic", color = "grey40", size = 16,
                                      hjust = 0, margin = margin(t = 15, r = 0, b = 0,
                                                                 l = 0)),
          axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)),
          axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0),
                                      angle = 0, vjust = .5),
          panel.grid.minor = element_blank(),
          panel.background = element_blank()
    )

#########################################################################   
#               LOAD DATA                                               #   
#########################################################################  

  # Municipal and list-level formatted data
  load("data/quotas_mun_clean.RData")
  load("data/quotas_list_clean.RData")
  
#########################################################################   
#               CLEAN DATA                                               #   
#########################################################################   
  
  # Create performance variable
  list <- list %>%
    mutate(Performance = factor(ifelse(votes_per_rank == 1, "First",
                                          ifelse(votes_per_rank == 2, "Second",
                                                 ifelse(votes_per_rank == 3, "Third", "Fourth or lower"))),
                                levels = c("First", "Second", "Third", "Fourth or lower")))
  
  # Create performance variable
  list <- list %>%
    mutate(Performance_rev = factor(ifelse(votes_per_rank == 1, "First",
                                        ifelse(votes_per_rank == 2, "Second",
                                               ifelse(votes_per_rank == 3, "Third", "Fourth or lower"))),
                                 levels = c("Fourth or lower", "Third", "Second", "First")))
  
  # Standardize 2018 votes per
  list <- mutate(list, votes_per = (votes_per - mean(votes_per, na.rm = T))/sd(votes_per,na.rm = T))
  
  # Make data frame with selected variables
  list.comp <- list %>%
    dplyr::select(FHL, won_mayor, votes_per, first_place, votes_per_rank, n_lists_2018, 
                  list_class_ennahdha,  n_lists_2018, Performance,Performance_rev, 
                  list_seats_n, gov_en, mun_en, votes_per, u_id, sd_votes_per_2014_parl, above_mean_2014_parl,
                  sd_log_pop, sd_emp_unemploy_rate, sd_pop_com_per, sd_turnout_per_pop_f_2014_parl) %>%
    mutate(list_class_ennahdha = factor(list_class_ennahdha)) 
  nrow(list.comp) # 2074
  
  # Order mayor list_party by number of mayorships won
  nmay <- dplyr::count(mun, party = mayor_party_short) %>% arrange(n)
  mun <- mutate(mun, mayor_party_short = factor(mayor_party_short, levels = c(nmay$party)))
  
  # Create variable for if list was first place, and won mayor
  list <- list %>%
    mutate(first_won = ifelse(first_place == 1 & won_mayor == 1, 1, 0))
  
  
#########################################################################   
#               SUMMARY STATS                                           #   
#########################################################################    

  ##### TABLE OF MAYORS BY LIST RANK
  
  # 72/350 = 20.5 Percent of first placed lists that are women
  list %>% dplyr::select(FHL, first_place) %>%
    group_by(FHL) %>%
    dplyr::count(first_place)
  
  # 68/350 = 19.4 Percent of mayors lists that are women
  list %>% dplyr::select(FHL, won_mayor) %>%
    group_by(FHL) %>%
    dplyr::count(won_mayor)
  
  # For MHLs whose list came first, 65% won mayor (182/(96+182))
  # For FHLs whose list came first, 61% won mayor (44/(28+44))
  list %>% dplyr::select(FHL, won_mayor, first_place) %>%
    group_by(FHL, first_place) %>%
    dplyr::count(won_mayor)
  
  # Lists by party: 1055 + 159 = 1214 party/coalition lists, 860 independent
  list %>% dplyr::count(list_type)
  list %>% dplyr::count(list_class_ennahdha)

  ##### MAYOR GENDER BY LIST TYPE
  
  # E=32%, N = 25%, T = 30%, I = 0%
  CrossTable(mun$mayor_list_type, mun$mun_mayor_fem)
  chisq.test(table(mun$mayor_list_type, mun$mun_mayor_fem))
 
   
  ##### LIST HEADS AND WINNERS BY LIST TYPE
  
  # Create var for party vs. independent
  list <- mutate(list, list_type2 = ifelse(list_type == "independent", "Independent", "Party"))
  
  # Summarize by party and list head and mayor gender
  head <- dplyr::select(list, FHL, list_type2) %>% 
    group_by(list_type2, FHL) %>%
    dplyr::count() %>%
    mutate(var = "Heads of lists") %>%
    dplyr::rename(List = list_type2,
                  Gender = FHL)
  wnrs <- dplyr::select(list, FHL, first_place, list_type2) %>%
    filter(first_place == 1) %>%
    group_by(list_type2, FHL) %>%
    dplyr::count() %>%
    mutate(var = "Heads of first place lists") %>%
    dplyr::rename(List = list_type2,
                  Gender = FHL)
  myrs <- dplyr::select(list, FHL, won_mayor, list_type2) %>%
    filter(won_mayor == 1) %>%
    group_by(list_type2, FHL) %>%
    dplyr::count() %>%
    mutate(var = "Won mayor") %>%
    dplyr::rename(List = list_type2,
                  Gender = FHL)
  
  # Add rows for independent female first place and mayors (0)
  wnrs[4,1] <- "Independent"
  wnrs[4,2] <- 1
  wnrs[4,3] <- 0
  wnrs[4,4] <- "Heads of first place lists"
  
  myrs[4,1] <- "Independent"
  myrs[4,2] <- 1
  myrs[4,3] <- 0
  myrs[4,4] <- "Won mayor"
  
  # Merge and format
  wnrsmyrs <- rbind(wnrs,myrs) %>% # head
    dplyr::mutate(List = gsub("Independent", "Independent", List),
                  List = factor(List, levels = c("Party", "Independent")),
                  var = factor(var, levels = c("Heads of first place lists","Won mayor"))) %>% # "Won mayor"
    dplyr::group_by(var, List)  %>%
    dplyr::mutate(total = sum(n),
                  percent = n/total)
  
  # Graph 1: Raw totals
  ggplot(wnrsmyrs, aes(List, n, fill = as.factor(Gender))) +
    geom_bar(position = "dodge", stat = "identity") +
    facet_wrap(~ var, scales = "fixed" ) +
    labs(x = "List type", y = "Count", fill = "Gender") +
    # geom_text(stat ='identity', aes(label = n),
    #           position = position_dodge(width = 1), vjust = -.9,
    #           color = "black", size = 4.5) +
    scale_fill_manual(values = c(vid_med, vid_low), 
                      labels = c("Male", "Female")) + 
    scale_y_continuous(expand = c(0,0)) +
    lecs_theme_lines + theme(axis.text.x = element_text(size = 12),
                             legend.text=element_text(size=12)) 
  
  # Merge and format
  headswnrs <- rbind(head, wnrs,myrs) %>% # myrs
    dplyr::mutate(List = gsub("Independent", "Independent", List),
                  List = factor(List, levels = c("Party", "Independent")),
                  var = factor(var, levels = c("Heads of lists", "Heads of first place lists","Won mayor"))) %>% # "Won mayor"
    dplyr::group_by(var, List)  %>%
    dplyr::mutate(total = sum(n),
                  percent = n/total)
  
  
  #########################################################################   
  #                 FIGURES/TABLES FOR THE PAPER                          #   
  ######################################################################### 
  
  ###################################################################################   
  #           GENDER OF LIST HEAD, FIRST PLACE LIST, AND MAYOR  (FIGURE 1)          #   
  ###################################################################################
  
  plot_head <- ggplot(head, aes(List, n, fill = as.factor(Gender))) +
    geom_bar(position = "dodge", stat = "identity") +
    labs(title = "Heads of lists",
         x = "List type", y = "Count", fill = "Gender") +
    # geom_text(stat ='identity', aes(label = n),
    #           position = position_dodge(width = 1), vjust = -.9,
    #           color = "black", size = 4.5) +
    scale_fill_manual(values = c(vid_med, vid_low), 
                      labels = c("Male", "Female")) + 
    scale_y_continuous(limits=c(0,850), expand = c(0,0)) +
    lecs_theme_lines + theme(title = element_text(size = 14),
                             axis.text.x = element_text(size = 12),
                             legend.text=element_text(size=12)) 
  
  plot_head
  
  plot_wnrs <- ggplot(wnrs, aes(List, n, fill = as.factor(Gender))) +
    geom_bar(position = "dodge", stat = "identity") +
    labs(title = "Heads of first-place lists",
         x = "List type", y = "", fill = "Gender") +
    # geom_text(stat ='identity', aes(label = n),
    #           position = position_dodge(width = 1), vjust = -.9,
    #           color = "black", size = 4.5) +
    scale_fill_manual(values = c(vid_med, vid_low), 
                      labels = c("Male", "Female")) + 
    scale_y_continuous(limits=c(0,185), expand = c(0,0)) +
    lecs_theme_lines + theme(title = element_text(size = 14),
                             axis.text.x = element_text(size = 12),
                             legend.text=element_text(size=12)) 
  
  plot_wnrs
  
  plot_myrs <- ggplot(myrs, aes(List, n, fill = as.factor(Gender))) +
    geom_bar(position = "dodge", stat = "identity") +
    labs(title = "Mayors",
         x = "List type", y = "", fill = "Gender") +
    # geom_text(stat ='identity', aes(label = n),
    #           position = position_dodge(width = 1), vjust = -.9,
    #           color = "black", size = 4.5) +
    scale_fill_manual(values = c(vid_med, vid_low), 
                      labels = c("Male", "Female")) + 
    scale_y_continuous(limits=c(0,185), expand = c(0,0)) +
    lecs_theme_lines + theme(title = element_text(size = 14),
                             axis.text.x = element_text(size = 12),
                             legend.text=element_text(size=12)) 
  
  plot_myrs
  
  comb_heads <- ggarrange(plot_head, plot_wnrs, plot_myrs,
                          labels = c("", ""), 
                          ncol = 3, nrow = 1,
                          common.legend = T,
                          legend = "bottom")
  comb_heads
  ggsave("figures/list_heads_winners_count2.pdf", 
         plot = last_plot(), limitsize = F, width = 14, height = 5)  
  

  
#########################################################################   
#         PREDICTING WHO WINS THE MAYORSHIP - [TABLE 2]                 #   
#########################################################################      
  
  # Hypothesis: All else-equal, female list heads will be less likely to
  # be elected mayor than their male counterparts. 
  
  
  ##### OLS MODELS
  
  # Create list of model names
  mayor_rr <- list()
  
  # Run models
  mayor_rr[[1]] <- lm(won_mayor ~ FHL + Performance_rev  +  list_class_ennahdha + n_lists_2018  + gov_en, data = list.comp)
  
  mayor_rr[[2]] <- lm(won_mayor ~ FHL * Performance_rev  +  list_class_ennahdha + n_lists_2018 + gov_en, data = list.comp)
  
  names(mayor_rr) <- c("All lists", "All lists")
  
  stargazer(mayor_rr, type = "text", omit = ("gov_en"))
  
  
  # Cluster standard errors by municipality
  mayor.rr.ols.cl <- list()
  mayor.rr.ols.cl[[1]] <- cluster.se(mayor_rr[[1]], list.comp$u_id)
  mayor.rr.ols.cl[[2]] <- cluster.se(mayor_rr[[2]], list.comp$u_id)

  
  # Calculate new F-stat with clustering
  check.f <- list()
  check.f[[1]] <- f.test(mayor_rr[[1]], list.comp$u_id)
  check.f[[2]] <- f.test(mayor_rr[[2]], list.comp$u_id)

  f <- NA
  p <- NA
  for(i in 1:length(check.f)){
    f[i] <- round(check.f[[i]]$F[2],3)
    p[i] <- round(check.f[[i]]$`Pr(>F)`[2],4)
  }
  fstars <- data.frame(fval = f, pval = p, stars = c("***","***")) %>%
    mutate(fstars = paste(fval, stars, sep = ""))
  
  # Output table
  stargazer(mayor_rr[[1]], mayor_rr[[2]],
            out = "tables/mayors_rr_ols.tex",
            title = "Selection of mayors by rank and gender",
            label = "mayor.ols",
            table.placement = "H",
            type = "text", 
            column.labels = names(mayor_rr),
            dep.var.labels = "Probability of being elected mayor",
            order = c("FHL","Performance_revFirst","Performance_revSecond","Performance_revThird",
                     "FHL:Performance_revFirst","FHL:Performance_revSecond","FHL:Performance_revThird"), 
            covariate.labels = c( "FHL",
                                  "FHL x Third place",
                                  "FHL x Second place",
                                  "FHL x First place",
                                  "First place",
                                  "Second place",
                                  "Third place",
                                  "Constant"),
            se = mayor.rr.ols.cl, 
            omit.stat = "f", 
            omit = c("gov_en", "list_class_ennahdha", "n_lists_2018"),
            font.size = "scriptsize",
            add.lines = list(c("Controls", "Y", "Y"),
                             c("Governorate FE", "Y", "Y"),
                             c("F Statistic", fstars$fstars)),
            notes.align = "l",
            notes = c("OLS models with standard errors clustered by municipality. ",
                      "Controls include party type, the number of lists running in",
                      "the municipality, and governorate fixed effects. Performance",
                      "is the rank that a given list finished in the 2018 elections.",
                      "The baseline for Performance are lists that finished in fourth",
                      "place or worse.")
  ) 
  
  
  
  ##### LOGIT MODELS
  
  # Create list of model names
  mayor.rr.log <- list()
  
  # Run models
  mayor.rr.log[[1]] <- glm(won_mayor ~ FHL + Performance_rev  +  list_class_ennahdha + n_lists_2018 + gov_en, data = list.comp, 
                        family = "binomial")
  mayor.rr.log[[2]] <- glm(won_mayor ~ FHL *  Performance_rev  + list_class_ennahdha + n_lists_2018 + gov_en, data = list.comp, 
                        family = "binomial")
  names(mayor.rr.log) <- c("All lists","All lists")
  
  # Cluster standard errors by municipality
  mayor.rr.log.cl <- list()
  mayor.rr.log.cl[[1]] <- cluster.se(mayor.rr.log[[1]], list.comp$u_id)
  mayor.rr.log.cl[[2]] <- cluster.se(mayor.rr.log[[2]], list.comp$u_id)

  # Output table
  stargazer(mayor.rr.log[[1]],mayor.rr.log[[2]],
            out = "tables/mayors_rr_log.tex",
            title = "Selection of mayors by rank and gender",
            label = "mayor.log",
            table.placement = "H",
            type = "text", 
            column.labels = names(mayor.rr.log),
            dep.var.labels = "Probability of being elected mayor",
            order = c("FHL","Performance_revFirst","Performance_revSecond","Performance_revThird",
                      "FHL:Performance_revFirst","FHL:Performance_revSecond","FHL:Performance_revThird"), 
            covariate.labels = c( "FHL",
                                  "FHL x Third place",
                                  "FHL x Second place",
                                  "FHL x First place",
                                  "First place",
                                  "Second place",
                                  "Third place",
                                  "Constant"),
            se = mayor.rr.log.cl, 
            omit = c("gov_en", "list_class_ennahdha", "n_lists_2018"),
            font.size = "scriptsize",
            add.lines = list(c("Controls", "Y", "Y"),
                             c("Governorate FE", "Y", "Y")),
            notes.align = "l",
            notes = c("Logit model with standard errors clustered by municipality. ",
                      "Controls include party type, the number of lists running in",
                      "the municipality, and governorate fixed effects. Performance",
                      "is the rank that a given list finished in the 2018 elections.",
                      "The baseline for Performance are lists that finished in fourth",
                      "place or worse.")
  )
  
  
  